from stanfordcorenlp import StanfordCoreNLP
import pickle

with open('zh_split.pickle', 'rb') as f:
     zh_vocab = pickle.load(f)

s = set()
for i in zh_vocab:
     s.update(i)
zh = list(s)

zh_index2word = {i: w for i, w in enumerate(zh)}
zh_word2index = {w: i for i, w in enumerate(zh)}


f1 = open("zh_index2word.pickle", "wb")
f2 = open("zh_word2index.pickle", "wb")
pickle.dump(zh_word2index, f2)
f2.close()

pickle.dump(zh_index2word, f1)
f1.close()

with open('en_split.pickle', 'rb') as f:
    en_vocab = pickle.load(f)

s = set()
for i in en_vocab:
     s.update(i)
en = list(s)

en_index2word = {i: w for i, w in enumerate(en)}
en_word2index = {w: i for i, w in enumerate(en)}

f3 = open("en_index2word.pickle", "wb")
f4 = open("en_word2index.pickle", "wb")
pickle.dump(en_word2index, f4)
f4.close()

pickle.dump(en_index2word, f3)
f3.close()